miχpods: MOM6#

  • bootstrap error on mean, median?

  • Try daily vs hourly

  • add TAO χpods

  • fix EUC max at 110

Questions:

  • N2 vs N2T

  • match time intervals

  • match vertical resolution and extent

  • restrict TAO S2 to ADCP depth range

  • restrict to top 200m.

Notes:

  • Filtering out MLD makes a big difference!

  • SInce we’re working with derivatives does the vertical grid matter (as long as it is coarser than observations)?

References#

Warner & Moum (2019)

Setup#

%load_ext watermark

import datetime
import glob
import os

import cf_xarray
import dask
import dcpy
import distributed
import flox.xarray
import holoviews as hv
import hvplot.xarray
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import xgcm

import pump
from pump import mixpods

hv.notebook_extension('bokeh')

dask.config.set({"array.slicing.split_large_chunks": False})
plt.rcParams["figure.dpi"] = 140 
plt.style.use("bmh")
xr.set_options(keep_attrs=True, display_expand_data=False)

gcmdir = "/glade/campaign/cgd/oce/people/bachman/TPOS_1_20_20_year/OUTPUT/"  # MITgcm output directory
stationdirname = gcmdir

%watermark -iv
The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
xgcm         : 0.6.0
dask         : 2022.7.0
json         : 2.0.9
pump         : 0.1
cf_xarray    : 0.7.4
hvplot       : 0.8.0
numpy        : 1.22.4
distributed  : 2022.7.0
matplotlib   : 3.5.2
sys          : 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:04:59) [GCC 10.3.0]
holoviews    : 1.14.9
ipywidgets   : 7.7.1
pandas       : 1.4.3
ncar_jobqueue: 2021.4.14
dcpy         : 0.1.dev385+g121534c
dask_jobqueue: 0.7.3
xarray       : 2022.6.0rc0
flox         : 0.5.9
import ncar_jobqueue

if "client" in locals():
    client.close()
    del client
if "cluster" in locals():
    cluster.close()

#env = {"OMP_NUM_THREADS": "3", "NUMBA_NUM_THREADS": "3"}

# cluster = distributed.LocalCluster(
#    n_workers=8,
#    threads_per_worker=1,
#    env=env
# )

if "cluster" in locals():
    del cluster

#cluster = ncar_jobqueue.NCARCluster(
#    project="NCGD0011",
#    scheduler_options=dict(dashboard_address=":9797"),
#)
# cluster = dask_jobqueue.PBSCluster(
#    cores=9, processes=9, memory="108GB", walltime="02:00:00", project="NCGD0043",
#    env_extra=env,
# )

import dask_jobqueue

cluster = dask_jobqueue.PBSCluster(
    cores=1, # The number of cores you want
    memory='12GB', # Amount of memory
    processes=1, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='$TMPDIR', # Use your local directory
    resource_spec='select=1:ncpus=1:mem=23GB', # Specify resources
    project='ncgd0011', # Input your project ID here
    walltime='02:00:00', # Amount of wall time
    interface='ib0', # Interface to use
)

cluster.adapt(minimum_jobs=2, maximum_jobs=12)

client = distributed.Client(cluster)
dask.config.set(scheduler=client)
client

Client

Client-aef1eb37-14d1-11ed-8cbb-ac1f6bc9f55e

Connection method: Cluster object Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status

Cluster Info

Read data#

TAO#

tao_gridded = (
    xr.open_dataset(
        os.path.expanduser("~/work/pump/zarrs/tao-gridded-ancillary.zarr"), chunks="auto", engine="zarr"
    )
    .sel(longitude=-140, time=slice("1996", None))
)
tao_gridded["depth"].attrs["axis"] = "Z"
tao_gridded["shallowest"].attrs.clear()
tao_gridded = mixpods.prepare(tao_gridded, oni=pump.obs.process_oni())

tao_gridded.u.cf.plot()
tao_gridded.eucmax.plot()

tao_gridded = (
    tao_gridded.update({
        "n2s2pdf": mixpods.pdf_N2S2(
            tao_gridded[["S2", "N2T"]].drop_vars(["shallowest", "zeuc"])
        ).persist()
    }
    )
)
tao_gridded
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "depth" starting at index 58. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "time" starting at index 139586. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "longitude" starting at index 2. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
<xarray.Dataset>
Dimensions:                (time: 212302, depth: 61, N2T_bins: 29, S2_bins: 29,
                            enso_transition_phase: 7)
Coordinates: (12/19)
    deepest                (time) float64 dask.array<chunksize=(212302,), meta=np.ndarray>
  * depth                  (depth) float64 -300.0 -295.0 -290.0 ... -5.0 0.0
    eucmax                 (time) float64 dask.array<chunksize=(33130,), meta=np.ndarray>
    latitude               float32 0.0
    longitude              float32 -140.0
    mld                    (time) float64 dask.array<chunksize=(212302,), meta=np.ndarray>
    ...                     ...
    cool_mask              (time) bool False False False ... False False False
    enso_transition        (time) <U12 'La-Nina warm' ... '____________'
  * N2T_bins               (N2T_bins) object (-5.0, -4.896551724137931] ... (...
  * S2_bins                (S2_bins) object (-5.0, -4.896551724137931] ... (-...
  * enso_transition_phase  (enso_transition_phase) object 'none' ... 'all'
    bin_areas              (N2T_bins, S2_bins) float64 0.0107 0.0107 ... 0.0107
Data variables: (12/14)
    N2                     (time, depth) float64 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    N2T                    (time, depth) float64 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    Ri                     (time, depth) float64 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    Rig_T                  (time, depth) float64 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    S                      (time, depth) float64 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    S2                     (time, depth) float32 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    ...                     ...
    densT                  (time, depth) float64 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    theta                  (time, depth) float64 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    u                      (time, depth) float32 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    v                      (time, depth) float32 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    shred2                 (time, depth) float64 dask.array<chunksize=(33130, 61), meta=np.ndarray>
    n2s2pdf                (N2T_bins, S2_bins, enso_transition_phase) float64 dask.array<chunksize=(29, 29, 1), meta=np.ndarray>
_images/mixpods-mom6_6_2.png
tao_Ri = xr.load_dataarray("tao-hourly-Ri-seasonal-percentiles.nc").cf.guess_coord_axis()

MITgcm stations#

stations = pump.model.read_stations_20(stationdirname)

gcmeq = stations.sel(longitude=[-155.025, -140.025, -125.025, -110.025], method="nearest")
#enso = pump.obs.make_enso_mask()
#mitgcm["enso"] = enso.reindex(time=mitgcm.time.data, method="nearest")

#gcmeq["eucmax"] = pump.calc.get_euc_max(gcmeq.u)
# pump.calc.calc_reduced_shear(gcmeq)

#oni = pump.obs.process_oni()
#gcmeq["enso_transition"] = mixpods.make_enso_transition_mask(oni).reindex(time=gcmeq.time.data, method="nearest")


mitgcm = gcmeq.sel(longitude=-140.025, method="nearest")
metrics = mixpods.normalize_z(pump.model.read_metrics(stationdirname), sort=True)
mitgcm = mixpods.normalize_z(mitgcm, sort=True)

mitgcm_grid = xgcm.Grid(
    metrics.sel(latitude=mitgcm.latitude, longitude=mitgcm.longitude, method="nearest"),
    coords=({"Z": {"center": "depth", "outer": "RF"}, "Y": {"center": "latitude"}}),
    metrics={"Z": ("drF", "drC")},
    periodic=False,
    boundary="fill",
    fill_value=np.nan,
)

mitgcm.theta.attrs["standard_name"]  = "sea_water_potential_temperature"

mitgcm = mixpods.prepare(mitgcm, grid=mitgcm_grid, oni=pump.obs.process_oni()).sel(latitude=0, method="nearest").cf.sel(Z=slice(-250, 0))
mitgcm = mitgcm.update({"n2s2pdf": mixpods.pdf_N2S2(mitgcm)}).load(scheduler=client)
mitgcm
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/dask/array/core.py:4700: PerformanceWarning: Increasing number of chunks by factor of 37
  result = blockwise(
<xarray.Dataset>
Dimensions:                (depth: 100, time: 174000, RF: 101, N2T_bins: 29,
                            S2_bins: 29, enso_transition_phase: 7)
Coordinates: (12/15)
  * depth                  (depth) float32 -248.8 -246.2 -243.8 ... -3.75 -1.25
    latitude               float64 0.025
    longitude              float64 -140.0
  * time                   (time) datetime64[ns] 1998-12-31T18:00:00 ... 2018...
  * RF                     (RF) float64 -250.0 -247.5 -245.0 ... -5.0 -2.5 0.0
    eucmax                 (time) float32 -123.8 -123.8 -123.8 ... -133.8 -133.8
    ...                     ...
    cool_mask              (time) bool True True True True ... False False False
    enso_transition        (time) <U12 'La-Nina cool' ... 'El-Nino warm'
  * N2T_bins               (N2T_bins) object (-5.0, -4.896551724137931] ... (...
  * S2_bins                (S2_bins) object (-5.0, -4.896551724137931] ... (-...
  * enso_transition_phase  (enso_transition_phase) object 'none' ... 'all'
    bin_areas              (N2T_bins, S2_bins) float64 0.0107 0.0107 ... 0.0107
Data variables: (12/26)
    DFrI_TH                (depth, time) float32 -87.56 -82.49 ... 0.0 0.0
    KPPdiffKzT             (depth, time) float32 0.0001761 0.000164 ... 0.0 0.0
    KPPg_TH                (depth, time) float32 -0.0 0.0 0.0 ... 0.0 0.0 0.0
    KPPhbl                 (time) float32 3.257 6.129 7.671 ... 5.686 12.78
    KPPviscAz              (depth, time) float32 0.0001851 0.000173 ... 0.0 0.0
    SSH                    (time) float32 0.4105 0.3855 0.407 ... 0.5106 0.5155
    ...                     ...
    dTdt                   (depth, time) float64 1.22 1.233 ... -44.36 -31.24
    N2T                    (RF, time) float64 2.86e-05 2.929e-05 ... nan nan
    S2                     (RF, time) float32 nan nan nan nan ... nan nan nan
    shred2                 (RF, time) float64 -8.363e-05 -8.621e-05 ... nan nan
    Rig_T                  (RF, time) float64 0.9299 0.9463 0.9597 ... nan nan
    n2s2pdf                (N2T_bins, S2_bins, enso_transition_phase) float64 ...
Attributes:
    easting:   longitude
    northing:  latitude
    title:     Station profile, index (i,j)=(1201,240)
mitgcm.u.cf.plot()
mitgcm.mldT.reset_coords(drop=True).cf.plot()
mitgcm.eucmax.reset_coords(drop=True).cf.plot()
[<matplotlib.lines.Line2D at 0x2b671b596740>]
_images/mixpods-mom6_10_1.png
mixpods.plot_n2s2pdf(mitgcm.n2s2pdf.sel(enso_transition_phase="none"))
<matplotlib.contour.QuadContourSet at 0x2b65e58af010>
_images/mixpods-mom6_11_1.png

MOM6 run : calculate ONI#

dirname = "/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/hist"
static = xr.open_dataset(*glob.glob(f"{dirname}/*static*.nc"))
sfc = xr.open_mfdataset(
    sorted(glob.glob(f"{dirname}/*sfc*")),
    coords="minimal",
    data_vars="minimal",
    compat="override",
    use_cftime=True,
    parallel=True,
)
sfc["time"] = sfc.time + datetime.timedelta(days=365*1957)
#sfc["time"] = sfc.time + xr.coding.cftime_offsets.YearBegin(1957)
sfc.coords.update(static.drop("time"))
#sfc["tos"].attrs["coordinates"] = "geolon geolat"
sst = sfc.cf["sea_surface_temperature"]
sst.cf
# Calculate a monthly average sea surface temperature in the Nino 3.4 region (5°S-5°N, 170°W-120°W).
monthly_ = (
    sst.cf.sel(latitude=slice(-5, 5), longitude=slice(-170, -120))
    .cf.mean(["X", "Y"])
    .resample(time="M")
    .mean()
    #.sel(time=slice("1960", "1999-12-31"))
    #.reindex(
    #    time=xr.cftime_range("1955-01-01", monthly.time[-1].dt.strftime("%Y-%m-%d").data.item(), freq="MS")
    #)
    .load()
)

monthly = (
    monthly_
    .reindex(
        time=xr.cftime_range(
            "1956-01-01",
            monthly_.time[-1].dt.strftime("%Y-%m-%d").data.item(), 
            freq="M", 
            calendar=monthly_.time.dt.calendar,
        )
    )
    .convert_calendar("gregorian", use_cftime=False)
)
#monthly = monthly_
monthly.plot()
[<matplotlib.lines.Line2D at 0x2b65a25e4b50>]
_images/mixpods-mom6_15_1.png
(
    oniobs.hvplot.line(x="time", label="obs")
    * onimom6.hvplot.line(x="time", label="MOM6")
).opts(ylabel="ONI [°C]")
oniobs.enso_transition_mask.plot()
onimom6.enso_transition_mask.plot(color='r')
[<matplotlib.lines.Line2D at 0x2b6d7fd20eb0>]
_images/mixpods-mom6_17_1.png

MOM6 sections#

import mom6_tools
import xgcm
from mom6_tools.sections import combine_nested, read_raw_files
from mom6_tools.wright_eos import wright_eos


def combine_variables_by_coords(dsets):
    import itertools

    import tqdm

    all_vars = set(itertools.chain(*[ds.data_vars for ds in dsets]))

    merged = xr.merge(
        xr.combine_by_coords([ds[var] for ds in dsets if ds.sizes["time"] > 0], combine_attrs="override")
        for var in tqdm.tqdm(all_vars)
    )
    
    return merged
import glob

# dirname = "/glade/scratch/gmarques/gmom.e23.GJRAv3.TL319_t061_zstar_N65.mixpods.001/run"

dirname = "/glade/scratch/dcherian/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/run/"
files = sorted(glob.glob(f"{dirname}/*TAO*140W*.nc.*"))

lons = [140]  # [90, 110, 140, 155, 170]  # TODO: Add 125
dsets = read_raw_files(files, parallel=True)
combined = combine_variables_by_coords(dsets)

#mom6tao = read_tao_sections(dirname)

def interp_to_center(ds):
    import toolz as tlz

    ix0 = np.arange(1, ds.sizes["xq"], 4)
    ix1 = ix0 + 1
    indices = list(tlz.interleave([ix0, ix1]))

    # print(ds.xq.data[indices])
    out = (
        ds
        #.isel(xq=[1, 2, 5, 6, 8, 9, 12, 13, 16, 17])
        .isel(xq=[1, 2])
        .coarsen(xq=2).mean()
    )
    out = out.sel(xh = out.xq.data, method="nearest", tolerance=0.05)
    
    for var in out:
        if "xq" in out[var].dims:
            out[var] = out[var].rename({"xq": "xh"}).drop("xh")
    return out.drop_vars("xq")


mom6tao = interp_to_center(combined).cf.chunk({"Y": -1})
mom6tao["dens"] = wright_eos(mom6tao.thetao, mom6tao.so, 0)
mom6tao["dens"].attrs.update(
    {"units": "kg/m^3", "standard_name": "sea_water_potential_density"}
)
mom6tao
100%|██████████| 21/21 [02:21<00:00,  6.76s/it]
<xarray.Dataset>
Dimensions:           (time: 480600, yh: 49, xh: 1, zi: 66, zl: 65, nv: 2,
                       yq: 49)
Coordinates:
  * time              (time) object 0001-01-06 00:30:00 ... 0056-01-05 23:30:00
  * yh                (yh) float64 -5.911 -5.664 -5.417 ... 5.541 5.788 6.035
  * xh                (xh) float64 -140.0
  * zi                (zi) float64 0.0 2.5 5.0 7.5 ... 5.503e+03 5.751e+03 6e+03
  * zl                (zl) float64 1.25 3.75 6.25 ... 5.627e+03 5.876e+03
  * nv                (nv) float64 1.0 2.0
  * yq                (yq) float64 -6.035 -5.788 -5.541 ... 5.417 5.664 5.911
Data variables: (12/22)
    average_T2        (time) object dask.array<chunksize=(8640,), meta=np.ndarray>
    KPP_kheat         (time, zi, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
    volcello          (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
    net_heat_surface  (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
    Kv_u              (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
    time_bnds         (time, nv) timedelta64[ns] dask.array<chunksize=(8640, 2), meta=np.ndarray>
    ...                ...
    zos               (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
    KPP_OBLdepth      (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
    vo                (time, zl, yq, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
    average_DT        (time) timedelta64[ns] dask.array<chunksize=(8640,), meta=np.ndarray>
    tauy              (time, yq, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
    dens              (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>

Save#

mom6tao.chunk({"time": 24*365}).to_zarr(
    f"/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/moorings/tao.zarr",
    mode="w",
    consolidated=True,
)

Read sections#

mom6tao = (
    xr.open_dataset(
        "/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/moorings/tao.zarr",
        engine="zarr",
        use_cftime=True,
    )
    .drop_vars(["average_T1", "average_T2"])
    .chunk("auto")
)
mom6tao["time"] = mom6tao.indexes["time"].to_datetimeindex()
from mom6_tools.wright_eos import wright_eos

mom6tao["densT"] = wright_eos(mom6tao.thetao, 35, 0)
mom6tao["densT"].attrs.update({"standard_name": "sea_water_potential_density", "units": "kg/m3"})

# Unfortunately have to do this here so that Grid ios right.
mom6tao = mixpods.normalize_z(mom6tao, sort=True)
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/coding/times.py:360: FutureWarning: Index.ravel returning ndarray is deprecated; in a future version this will return a view on self.
  sample = dates.ravel()[0]
/glade/scratch/dcherian/tmp/ipykernel_249019/2807442231.py:10: RuntimeWarning: Converting a CFTimeIndex with dates from a non-standard calendar, 'noleap', to a pandas.DatetimeIndex, which uses dates from the standard calendar.  This may lead to subtle errors in operations that depend on the length of time between dates.
  mom6tao["time"] = mom6tao.indexes["time"].to_datetimeindex()
grid = xgcm.Grid(
    mom6tao,
    coords={"Z": {"outer": "zi", "center": "zl"}, 
            "X": {"center": "xh"},
            "Y": {"center": "yh", "left": "yq"},
           },
    periodic=False,
    boundary="fill",
    fill_value=np.nan,
    metrics = {("Z",): "h"}, 
)
grid
<xgcm.Grid>
Z Axis (not periodic, boundary='fill'):
  * outer    zi --> center
  * center   zl --> outer
X Axis (not periodic, boundary='fill'):
  * center   xh
Y Axis (not periodic, boundary='fill'):
  * center   yh --> left
  * left     yq --> center
mom6tao = mixpods.prepare(mom6tao, grid, monthly)
mom6tao
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/cf_xarray/accessor.py:1638: UserWarning: Variables {'areacello'} not found in object but are referred to in the CF attributes.
  warnings.warn(
<xarray.Dataset>
Dimensions:           (time: 480600, yh: 49, xh: 1, zi: 66, zl: 65, nv: 2,
                       yq: 49)
Coordinates: (12/13)
  * nv                (nv) float64 1.0 2.0
  * time              (time) datetime64[ns] 1958-01-06T00:30:00 ... 2013-01-0...
  * xh                (xh) float64 -140.0
  * yh                (yh) float64 -5.911 -5.664 -5.417 ... 5.541 5.788 6.035
  * yq                (yq) float64 -6.035 -5.788 -5.541 ... 5.417 5.664 5.911
  * zi                (zi) float64 -6e+03 -5.751e+03 -5.503e+03 ... -2.5 -0.0
    ...                ...
    eucmax            (time, xh) float64 dask.array<chunksize=(9612, 1), meta=np.ndarray>
    oni               (time) float32 nan nan nan nan nan ... nan nan nan nan nan
    warm_mask         (time) bool True True True True ... True True True True
    cool_mask         (time) bool False False False False ... False False False
    enso_transition   (time) <U12 '____________' ... '____________'
    mldT              (time, yh, xh) float64 dask.array<chunksize=(9612, 49, 1), meta=np.ndarray>
Data variables: (12/25)
    KPP_OBLdepth      (time, yh, xh) float32 dask.array<chunksize=(480600, 49, 1), meta=np.ndarray>
    KPP_buoyFlux      (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
    KPP_kheat         (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
    Kd_heat           (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
    Kv_u              (time, zl, yh, xh) float32 dask.array<chunksize=(9612, 65, 49, 1), meta=np.ndarray>
    SW                (time, yh, xh) float32 dask.array<chunksize=(480600, 49, 1), meta=np.ndarray>
    ...                ...
    zos               (time, yh, xh) float32 dask.array<chunksize=(480600, 49, 1), meta=np.ndarray>
    densT             (time, zl, yh, xh) float32 dask.array<chunksize=(9612, 65, 49, 1), meta=np.ndarray>
    N2T               (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 49, 1), meta=np.ndarray>
    S2                (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
    shred2            (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
    Rig_T             (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
mom6140 = mom6tao.cf.sel(longitude=-140, latitude=0, method="nearest")
mom6140 = mom6140.cf.sel(Z=slice(-250, 0))
mom6140 = mom6140.update(
    {"n2s2pdf": mixpods.pdf_N2S2(mom6140[["S2", "N2T", "eucmax"]])}
)
mom6140 = mom6140.update(
    mom6140[["uo", "eucmax", "mldT", "n2s2pdf", "S2", "N2T"]].load()
)
mom6140.uo.cf.plot(robust=True)
mom6140.eucmax.cf.plot()
mom6140.mld.cf.plot(lw=0.5, color='k')
mixpods.plot_n2s2pdf(mom6140.n2s2pdf.sel(enso_transition_phase="none"))
<matplotlib.contour.QuadContourSet at 0x2b6716e6b5e0>
_images/mixpods-mom6_33_1.png

Pick simulations#

datasets = {"TAO": tao_gridded, "MITgcm": mitgcm, "MOM6": mom6140}
for name, ds in datasets.items():
    (
        ds
        .cf["sea_water_x_velocity"]
        .cf["Z"]
        .plot(marker='.', ls="none", label=name)
    )
plt.legend()
<matplotlib.legend.Legend at 0x2b660b5312d0>
_images/mixpods-mom6_36_1.png

Compare EUC maximum and MLD#

mixpods.plot_timeseries(datasets, "mldT", obs="TAO")
datasets["TAO"].eucmax.attrs["long_name"] = "EUC maximum"
mixpods.plot_timeseries(datasets, "eucmax", obs="TAO")

Mean profiles: mean, std#

Model mean S2 is lower by a factor of 2

datasets["TAO"].shred2.cf.mean("time").plot()
[<matplotlib.lines.Line2D at 0x2b66ed7b43d0>]
_images/mixpods-mom6_41_1.png
kwargs = dict(show_grid=True, invert_axes=True, legend_position="right", frame_width=300, frame_height=500, 
              ylim=(-1e-3, 1.9e-3), xlim=(-250, 0))

S2 = mixpods.map_hvplot(lambda ds, name: mixpods.plot_profile_fill(ds.S2.load(), name), datasets).opts(**kwargs, ylabel="S^2")
N2 = mixpods.map_hvplot(lambda ds, name: mixpods.plot_profile_fill(4 * ds.N2T.load(), name), datasets).opts(**kwargs, ylabel="4N^2")
Ri = (
    mixpods.map_hvplot(lambda ds, name: mixpods.cfplot(ds.Rig_T.load().median("time"), name), datasets)
    * hv.HLine(0.25).opts(color='k', line_width=0.5)
).opts(**kwargs, ylabel="median Ri_g^T").opts(ylim=(0,1))
(S2 + N2 + Ri)

Remap to EUC coordinate#

gcmeq.coords["zeuc"] = gcmeq.depth - gcmeq.eucmax
remapped = flox.xarray.xarray_reduce(
    gcmeq.drop_vars(["SSH", "KPPhbl", "mld", "eucmax"], errors="ignore"),
    "zeuc",
    dim="depth",
    func="mean",
    expected_groups=(np.arange(-250, 250.1, 5),),
    isbin=(True,),
)
remapped["zeuc_bins"].attrs["units"] = "m"
remapped
<xarray.Dataset>
Dimensions:          (time: 174000, longitude: 4, zeuc_bins: 100)
Coordinates:
    latitude         float64 0.025
  * longitude        (longitude) float64 -155.0 -140.0 -125.0 -110.0
  * time             (time) datetime64[ns] 1998-12-31T18:00:00 ... 2018-11-06...
    cool_mask        (time) bool True True True True ... False False False False
    warm_mask        (time) bool False False False False ... True True True True
  * zeuc_bins        (zeuc_bins) object (-250.0, -245.0] ... (245.0, 250.0]
Data variables: (12/25)
    DFrI_TH          (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPdiffKzT       (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPg_TH          (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPviscAz        (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    VISrI_Um         (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    VISrI_Vm         (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    ...               ...
    S2               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    shear            (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    N2               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    shred2           (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    Ri               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    enso_transition  (zeuc_bins, time) <U12 'La-Nina cool' ... 'El-Nino warm'
Attributes:
    easting:   longitude
    northing:  latitude
    title:     Station profile, index (i,j)=(1201,240)
remapped = remapped.persist()
cluster.scale(6)

Seasonal median Ri: (0, 140)#

remapped["longitude"] = [-155.0, -140, -125, -110]
fg = (
    remapped.Ri.groupby("time.season").median()
    .sel(season=["DJF", "MAM", "JJA", "SON"], longitude=[-140, -110])
    .cf.plot(col="season", row="longitude", xlim=(0,1.5), ylim=(-20, None), label="MITgcm")
)
fg.data = tao_Ri.cf.sel(quantile=0.5, longitude=[-140, -110])
fg.map_dataarray_line(xr.plot.line, x=None, y="zeuc", hue="season", color='k', label="TAO")
fg.map(lambda: plt.axvline(0.25))
fg.axes[-1, -1].legend(bbox_to_anchor=(1.5, 1))
<matplotlib.legend.Legend at 0x2add93b58220>
_images/mixpods-mom6_51_1.png

Stability diagram: 4N2-S2 plane#

Warner & Moum (2019):

Both velocity and temperature are hourly averaged before interpolation to 5-m vertical bins

Contours enclose 50% of data

mixpods.plot_stability_diagram_by_dataset(datasets)
_images/mixpods-mom6_53_0.png

Questions:

  1. internal wave spectrum?

  2. minor axis is smaller for model, particularly MITgcm-> too diffusive?

  3. Can we think of this as the model relaxing to critical Ri too quickly

mixpods.plot_stability_diagram_by_phase(datasets, obs="TAO", fig=None)
_images/mixpods-mom6_55_0.png

Composed#

excluding MLD#

  • have not matched vertical grid spacing yet.

  • minor axis is smaller for increasing shear with KPP

  • prandtl number

  • interpolate to TAO grid

  • hybrid HYCOM-like case may have equatorial refinement of vertical grid above EUC

  • compare hybrid coordinate case resolution

fig = plt.figure(constrained_layout=True, figsize=(12, 6))
subfigs = fig.subfigures(2, 1, height_ratios=[1.2, 1])
top = subfigs[0].subfigures(1, 3, width_ratios=[1, 5, 1])

mixpods.plot_stability_diagram_by_dataset(datasets, fig=top[1])
mixpods.plot_stability_diagram_by_phase(datasets, fig=subfigs[1])
_images/mixpods-mom6_57_0.png

including MLD#

fig = plt.figure(constrained_layout=True, figsize=(12, 6))
subfigs = fig.subfigures(2, 1, height_ratios=[1.2, 1])
top = subfigs[0].subfigures(1, 3, width_ratios=[1, 5, 1])

mixpods.plot_stability_diagram_by_dataset(datasets, fig=top[1])
mixpods.plot_stability_diagram_by_phase(datasets, fig=subfigs[1])
_images/mixpods-mom6_59_0.png
(   
    tao_gridded.n2s2pdf
    .sel(enso_transition_phase=["La-Nina cool", "La-Nina warm", "El-Nino warm", "El-Nino cool"])
    .sum("N2_bins")
    .plot(hue="enso_transition_phase")
)
plt.figure()
(
    tao_gridded.n2s2pdf
    .sel(enso_transition_phase=["La-Nina cool", "La-Nina warm", "El-Nino warm", "El-Nino cool"])
    .sum("S2_bins")
    .plot(hue="enso_transition_phase")
)
[<matplotlib.lines.Line2D at 0x2b36e2bc2ad0>,
 <matplotlib.lines.Line2D at 0x2b36e2bc32e0>,
 <matplotlib.lines.Line2D at 0x2b36e2bc3310>,
 <matplotlib.lines.Line2D at 0x2b36e0ca52d0>]
_images/mixpods-mom6_60_1.png _images/mixpods-mom6_60_2.png
oni = pump.obs.process_oni().sel(time=slice("2005-Oct", "2015"))
(
    oni.hvplot.step(color='k')
+ pump.obs.make_enso_transition_mask().sel(time=slice("2005-Jun", "2015")).reset_coords(drop=True).hvplot.step()
).cols(1)

Seasonal mean heat flux#

remapped.Jq.load()
<xarray.DataArray 'Jq' (time: 174000, zeuc: 100)>
array([[        nan,         nan, -0.07841657, ...,         nan,
                nan,         nan],
       [        nan,         nan, -0.07973828, ...,         nan,
                nan,         nan],
       [        nan,         nan, -0.08094554, ...,         nan,
                nan,         nan],
       ...,
       [        nan, -0.07447129,         nan, ...,         nan,
                nan,         nan],
       [        nan, -0.07471326,         nan, ...,         nan,
                nan,         nan],
       [        nan, -0.07509062,         nan, ...,         nan,
                nan,         nan]])
Coordinates:
    latitude   float64 0.025
    longitude  float64 -140.0
  * time       (time) datetime64[ns] 1998-12-31T18:00:00 ... 2018-11-06T17:00:00
  * zeuc       (zeuc) float64 -247.5 -242.5 -237.5 -232.5 ... 237.5 242.5 247.5
import hvplot.xarray
(
    remapped.Jq.groupby("time.season").mean()
    .reindex(season=["DJF", "MAM", "JJA", "SON"])
    .cf.plot.line(col="season")
)
<xarray.plot.facetgrid.FacetGrid at 0x2affc38316c0>
_images/mixpods-mom6_65_1.png

Test out available variables#

  1. KPP_Kheat is non-zero only in KPP_OBL

(mom6140.Tflx_dia_diff * 1025 * 4000).cf.plot(robust=True)
<matplotlib.collections.QuadMesh at 0x2ba58a988fa0>
_images/mixpods-mom6_67_1.png
mom6140.Kv_u.cf.plot(norm=mpl.colors.LogNorm(1e-6, 1e-2))
<matplotlib.collections.QuadMesh at 0x2ba58a007cd0>
_images/mixpods-mom6_68_1.png
mom6140.h.median("time").plot()
mom6140.h.mean("time").plot()
[<matplotlib.lines.Line2D at 0x2ba58a547c10>]
_images/mixpods-mom6_69_1.png
mom6140.Kd_heat.cf.plot(norm=mpl.colors.LogNorm(1e-6, 1e-2))
mom6140.KPP_OBLdepth.plot(color='r')

plt.figure()
mom6140.KPP_kheat.cf.plot(norm=mpl.colors.LogNorm(1e-6, 1e-2))
mom6140.KPP_OBLdepth.plot(color='r')

plt.figure()
(mom6140.KPP_kheat - mom6140.Kd_heat).cf.plot(robust=True, cmap=mpl.cm.mpl.cm.Reds_r, vmax=0)
<matplotlib.collections.QuadMesh at 0x2ba58a8c38b0>
_images/mixpods-mom6_70_1.png _images/mixpods-mom6_70_2.png _images/mixpods-mom6_70_3.png

Experiment with manual combining#

import intake
from intake.source.utils import reverse_format

years = []
tiles = []
for file in files[:10]:
    p = pathlib.Path(file)
    params = reverse_format("__{year}.nc.{tile}", p.stem + p.suffix)
    years.append(params["year"])
    tiles.append(params["tile"])
years, tiles

import toolz as tlz

bytile = {}
for tile, v in tlz.groupby(tileid, files).items():
    bytile[tile] = xr.concat(read_raw_files(v, parallel=True), dim="time")



print("\n".join([hash(ds.yh.data.tolist()) for ds in list(bytile.values())]))

from functools import partial


def hash_coords(ds, axis):
    dims = ds.cf.axes[axis]
    data = np.concatenate(
        [ds[dim].data
        for dim in dims]
    )
    return hash(tuple(data))


grouped = bytile

for axis, concat_axis in [("X", "Y"), ("Y", "X")]:
    grouped = tlz.groupby(partial(hash_coords, axis=axis), grouped.values())
    grouped = {
        k: cfconcat(v, axis=concat_axis, join='exact') for k, v in grouped.items()
    }
    break 
grouped

cfconcat(list(grouped.values()), "X")

combined = xr.combine_by_coords(list(bytile.values()), combine_attrs="override")

def raise_if_bad_index(combined):
    bad = []
    for idx in combined.indexes:
        index = combined.indexes[idx]
        if not index.is_monotonic or index.has_duplicates:
            bad.append(idx)
    if bad:
        raise ValueError(f"Indexes {idx} are either not monotonic or have duplicates")

def tileid(path):
    p = pathlib.Path(path)
    #print(p.suffix)
    #params = reverse_format("__{year}.nc.{tile}", p.stem + p.suffix)
    return int(p.suffix[1:]) #params["tile"]
    #years.append(params["year"])
    #tiles.append(params["tile"])
    
sorted_files = sorted(files, key=tileid)
for tile, files in groupby(sorted_files, tileid):
    print(tile, len(list(files)))

Test out ONI calculation: close enough!#

ersst = xr.tutorial.open_dataset("ersstv5")

monthlyersst = (
    ersst.sst.cf.sortby("Y")
    .cf.sel(latitude=slice(-5, 5), longitude=slice(360-170, 360-120))
    .cf.mean(["X", "Y"])
    .resample(time="M").mean()
    .load()
)

expected = mixpods.calc_oni(monthlyersst)

actual = pump.obs.process_oni()
actual.plot()
expected.plot()

(actual-expected).plot()
[<matplotlib.lines.Line2D at 0x2b6d2ec04580>]
_images/mixpods-mom6_74_1.png